*******************************************************************************************
*******************************************************************************************
******************** Reclassification Risk in the Small Group Health Insurance Market
******************* by Sebastian Fleitas, Gautam Gowrisankaran and Anthony Lo Sasso 
*******************************************************************************************
*******************************************************************************************
*******************************************************************************************
******************** Table C6
*******************************************************************************************
*******************************************************************************************
*******************************************************************************************
 
 

*************************************************************
****** LOAD DATABASE
************************************************************* 

clear all
set more off
set matsize 11000

**************************** GET THE ACG SCORES FOR THE SIX MONTHS ****************************

cd "/Users/sebastianfleitas/Dropbox/MULTICHOICE_PROJECT/PREMIUMS_AND_HEALTHSHOCKS/6_month ACG"

insheet using "acg2012_1-6mos.csv", clear
lookfor acg
keep patient_id rescaled_total_cost_predicted_ri
rename patient_id mbr_sys_id
rename rescaled_total_cost_predicted_ri rescaled_acg_concurrent_risk1
sort mbr_sys_id
tempfile acg2012_6months
save `acg2012_6months'

insheet using "acg2013_1-6mos.csv", clear
lookfor acg
keep patient_id rescaled_total_cost_predicted_ri
rename patient_id mbr_sys_id
rename rescaled_total_cost_predicted_ri rescaled_acg_concurrent_risk2
sort mbr_sys_id
tempfile acg2013_6months
save `acg2013_6months' 


************************************************************************************************

cd "/Users/sebastianfleitas/Dropbox/ReclassificationRisk/data_stata"
*cd "/Users/sebastianfleitas/Desktop/ReclassificationRisk"
*cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles"

insheet using output_RF_vfixed.csv, comma clear 
sort mbr_sys_id year
save output_RF.dta, replace 

use database_by_members_n_choiceset.dta, clear 
sort mbr_sys_id

**************************** MERGING THE ACG SCORES FOR THE SIX MONTHS ***************************

merge mbr_sys_id using `acg2012_6months'
tab _merge
drop _merge 

sort mbr_sys_id
merge mbr_sys_id using `acg2013_6months'
tab _merge
drop _merge 
 
************************************************************************************************

****** CLEAN DATABASE VARIABLES THAT WE DON'T NEED AND ERRORS IN DATA
drop rx_dum*
sort  customer_number minor_market date_month alt_id
order customer_number minor_market date_month alt_id
drop if choice!=1 
drop if alt_id==""

****** CREATE VARIABLES FOR EXPENDITURE
gen paid_sum = paid_pharmacy + net_pd_amt
gen allowed_sum = allw_amt + allowed_pharma
gen outofpocketpharma = (allowed_pharma - paid_pharmacy)
gen outofpocketmedical = oop_dircomputed 
gen outpocket = (allowed_pharma - paid_pharmacy) + (allw_amt - net_pd_amt )

gen large_claim_50 = ( paid_sum > 50000)
gen large_claim_75 = ( paid_sum > 75000)
gen large_claim_100 = ( paid_sum > 100000)

**************************************
**************************************
**** MERGING WITH RISK SCORES DATABASE 
sort mbr_sys_id year
merge mbr_sys_id year using NEW_riskscores_July2017_inc2015.dta
drop if _merge==2
drop _merge
**************************************
**************************************


**************************************
****** THIS IS WITH RANDOM FOREST 
**************************************
sort mbr_sys_id year
merge mbr_sys_id year using output_RF.dta
drop if _merge==2
drop _merge
**************************************
rename pred_totexp_2012_108var_vfixed ORS_riskscore
gen FGL = 0.9127186 + (ORS_riskscore - 6486.558) * (1.412764/10497.09)
drop ORS_riskscore
rename FGL ORS_riskscore
sum acg_riskscore ORS_riskscore
**************************************
**************************************




*********************************************************
**** REPLACE THE AGC SCORE FOR THE HALF YEARS 
replace acg_riskscore= rescaled_acg_concurrent_risk1 if year==2012
replace acg_riskscore= rescaled_acg_concurrent_risk2 if year==2013
*********************************************************


****** CREATE OTHER AUXILIARY VARIABLES 
tab year, gen(yeardum)
gen female = ( gdr_cd=="F")
tab minor_market, gen(marketdum)
gen aux=1 
* create the numer of people per firm and per plan
bys customer_number  date_month          : egen numpeople     = total(aux)
bys customer_number  date_month polnbr   : egen numpeopleplan = total(aux)
* Compute the average premium per person at firm-plan level
gen av_premiumpc_plan = (total_premium*12)/numpeopleplan
*********************************************************************************************

**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
***** DROP ERRORS IN MEMBERS (NOT MERGED)
egen auxgroup = group( customer_number mbr_sys_id )
duplicates tag auxgroup  year, generate(check)
drop if check==19
drop check
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************


*********************************************************************************************
*********************************************************************************************
*********************************************************************************************
**** KEEPING ONLY THE FIRMS THAT WILL BE ULTIMATELY USED FOR REGRESSION
*********************************************************************************************
sort customer_number year
merge customer_number year using firmsinsample_July2017.dta
*drop 8 firms without complete obervations for two years
drop if customer_number=="01H5518"	
drop if customer_number=="0253935"	
drop if customer_number=="0268272"
drop if customer_number=="0386663"
drop if customer_number=="0466732"
drop if customer_number=="06H2419"	
drop if customer_number=="0765480"	
drop if customer_number=="08M6991"
*drop if firmstays!=1
drop _merge
*********************************************************************************************
*********************************************************************************************
*********************************************************************************************

tab year, gen(yeardummy)

**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
**********************************************************************************************
*********************************************************************************************
******** HERE ALL THE REGRESSIONS AT THE INDIVIDUAL LEVEL FOR EMPLOYEES
*********************************************************************************************
xtset mbr_sys_id year
set more off

**********************************************************************************************
**********************************************************************************************
******************** Estimation of Pass-Through from risk to claims 
**********************************************************************************************
**********************************************************************************************

gen laggedscore = acg_riskscore
gen laggedscore_ORS = L.ORS_riskscore




**********************************************************************************************
**********************************************************************************************
******************** DESCRIPTIVE STATISTICS FOR FINAL SAMPLE 
**********************************************************************************************
**********************************************************************************************

xtset mbr_sys_id year

gen lagged_code_hypertension = L.code_hypertension 
gen lagged_code_heartfailure = L.code_heartfailure 
gen lagged_code_chronickidneydisease = L.code_chronickidneydisease 
gen lagged_code_atrialfibrilization= L.code_atrialfibrilization 
gen lagged_code_asthma =L.code_asthma

gen lagged_chronic_cancer     = L.code_cancer
gen lagged_chronic_transplant = L.code_transplant
gen lagged_chronic_ami        = L.code_ami
gen lagged_chronic_diabetes   = L.code_diabetes

gen lagged_code_brainhemorr = L.code_brainhemorr
gen laggeed_code_respfailure = L.code_respfailure


****************************************************
****** CREATING Rp BEFORE DROPPING DATA 2012 ******* 
****************************************************

bys customer_number  year        : egen mean_pred_riskscore_rp_aux = mean(acg_riskscore)
xtset mbr_sys_id year
gen mean_pred_riskscore_rp = mean_pred_riskscore_rp_aux

bys customer_number  year        : egen mean_ORS_riskscore_rp_aux = mean(ORS_riskscore)
xtset mbr_sys_id year
gen mean_ORS_riskscore_rp = L.mean_ORS_riskscore_rp_aux

bys prime_plan_code  year        : egen mean_pred_policy_rp_aux = mean(acg_riskscore)
xtset mbr_sys_id year
gen mean_pred_policy_rp = L.mean_pred_policy_rp_aux

bys prime_plan_code  year        : egen mean_ORS_policy_rp_aux = mean(ORS_riskscore)
xtset mbr_sys_id year
gen mean_ORS_policy_rp  = L.mean_ORS_policy_rp_aux
****************************************************
****************************************************



****************************************************

****************************************************
/*
****** FOR USIC other database STUDY******* 
****************************************************
bys customer_number  year        : egen mean_pred_USIC_rp_aux = mean(usic_score)
xtset mbr_sys_id year
gen mean_pred_USIC_rp =L.mean_pred_USIC_rp_aux
*/
****************************************************

*exit


****************************************************
**** Other variables 
****************************************************
gen laggedclaims = L.paid_sum
gen laggedoutpocket = L.outpocket
gen laggedallowed_sum = L.allowed_sum

drop if firmstays!=1 & year!=2012


tab rel_cd , gen(relationship)

egen in2012 = total(laggedclaims!=. & year==2013), by(mbr_sys_id)
egen in2013 = total(year==2013), by(mbr_sys_id)
egen in2014 = total(year==2014), by(mbr_sys_id)
egen in2015 = total(year==2015), by(mbr_sys_id)
gen only2013 = (in2015!=1 & in2014!=1 & in2013==1 & in2012!=1)
gen only2014 = (in2015!=1 & in2014==1 & in2013!=1 & in2012!=1)
gen only2015 = (in2015==1 & in2014!=1 & in2013!=1 & in2012!=1)
gen exjoiners_201314no2012 =      (in2015!=1 & in2014==1 & in2013==1 & in2012!=1)
gen exquitters_201213no2014 =     (in2015!=1 & in2014!=1 & in2013==1 & in2012==1)
gen exstayers_20121314no2015 =    (in2015!=1 & in2014==1 & in2013==1 & in2012==1)
gen newjoiners2013_stayers2015 =  (in2015==1 & in2014==1 & in2013==1 & in2012!=1)
gen newjoiners2014 =              (in2015==1 & in2014==1 & in2013!=1 & in2012!=1)
gen newstayers_allyears =         (in2015==1 & in2014==1 & in2013==1 & in2012==1)
gen everyone_onlyoneyear_aux = only2013 + only2014 + only2015
gen nocathegory = 1- everyone_onlyoneyear_aux - exjoiners_201314no2012 - exquitters_201213no2014 - exstayers_20121314no2015 - newjoiners2013_stayers2015 - newjoiners2014 - newstayers_allyears
gen everyone_onlyoneyear_scattered = nocathegory + everyone_onlyoneyear_aux

************************************************************
***** Generate Stayers, Joiners and Quitters 
************************************************************
* case no joiner, no quitter no stayer mbr_sys_id==231551255

* aux variables 
bys mbr_sys_id: egen max_year= max(year)
bys mbr_sys_id: egen min_year= min(year)
gen complete_obs_aux = 1 if laggedscore!=.
gen no_2013_completeObs =  1 if year==2013 & complete_obs_aux!=1 
bys mbr_sys_id: egen complete_obs = max(complete_obs_aux)
** define joiner 
gen joiner_aux = 1 if (min_year!=2013 & complete_obs==1) | (min_year==2013 & no_2013_completeObs==1 & complete_obs==1)
bys mbr_sys_id: egen joiner = max(joiner_aux)
** define quitter
gen quitter  = 1 if max_year!=2015 & complete_obs==1


********** TABLE DESCRIPTIVE STATISTICS 
egen ind = group(mbr_sys_id) if newstayers_allyears==1 | joiner == 1 | quitter == 1
egen ind_stayers = group(mbr_sys_id)  if newstayers_allyears==1
egen ind_joiners = group(mbr_sys_id)  if joiner == 1
egen ind_quitters = group(mbr_sys_id) if quitter == 1
sum ind ind_* 

******* CREATE AUTOMATIC TABLE 
  foreach Y in lagged_chronic_cancer lagged_chronic_ami lagged_chronic_transplant lagged_chronic_diabetes lagged_code_hypertension lagged_code_heartfailure  lagged_code_asthma {
  gen `Y'_100 = `Y'*100
  }
gen chronickidneydisease_100 = lagged_code_chronickidneydisease*100

gen employee_100 = (relationship2==1)
gen spouse_100 = (relationship6==1)
gen children_100 = (relationship1==1)
gen others_100 =    ( relationship3==1 | relationship4==1 | relationship5==1)
gen female_100 = female*100

xtset mbr_sys_id year



**********************************************************************************************
*************************   CREATING AUXILIARY VARIABLES  
**********************************************************************************************
**********************************************************************************************
set more off 

bys cust_seg_nbr year: egen sd_age = sd(age)
egen mean_sd_age = mean(sd_age)

bys cust_seg_nbr year: egen sd_acg_riskscore = sd(acg_riskscore)
egen mean_sd_acg_riskscore = mean(sd_acg_riskscore)

browse mean_sd_age mean_sd_acg_riskscore

**********************************************************************************************
**********************************************************************************************

egen firm = group(customer_number)
egen market = group(minor_market)
quietly: tab(market), gen(dum_market)
quietly: tab(age), gen(dum_age)
quietly: tab(sic_cd), gen(dum_sector)


gen hhi2013=0
replace hhi2013 =  6668  if minor_market=="DELAWARE MINOR"
replace hhi2013 =  1927  if minor_market=="ARKANSAS MINOR"
replace hhi2013 =  3158  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace hhi2013 =  1398  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace hhi2013 =  3313  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace hhi2013 =  7159  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace hhi2013 =  4087  if minor_market=="OKLAHOMA MINOR"
replace hhi2013 =  4895  if minor_market=="TENNESSEE MINOR"
replace hhi2013 =  5165  if minor_market=="WISCONSIN MINOR"
replace hhi2013 =  4104  if minor_market=="WYOMING MINOR"

gen hhi2014=0
replace hhi2014 = 5631   if minor_market=="DELAWARE MINOR"
replace hhi2014 = 4816  if minor_market=="ARKANSAS MINOR"
replace hhi2014 = 3401  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace hhi2014 = 1624  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace hhi2014 = 4075 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace hhi2014 = 2700 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace hhi2014 = 4633  if minor_market=="OKLAHOMA MINOR"
replace hhi2014 = 4968 if minor_market=="TENNESSEE MINOR"
replace hhi2014 = 1638  if minor_market=="WISCONSIN MINOR"
replace hhi2014 = 4320 if minor_market=="WYOMING MINOR"

gen hhi2015=0
replace hhi2015 = 5385 if minor_market=="DELAWARE MINOR"
replace hhi2015 = 5236 if minor_market=="ARKANSAS MINOR"
replace hhi2015 = 3715 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace hhi2015 = 2315  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace hhi2015 = 4714 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace hhi2015 = 2912 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace hhi2015 = 5460  if minor_market=="OKLAHOMA MINOR"
replace hhi2015 = 4744 if minor_market=="TENNESSEE MINOR"
replace hhi2015 = 1443  if minor_market=="WISCONSIN MINOR"
replace hhi2015 = 5160 if minor_market=="WYOMING MINOR"

gen leader2013=0
replace leader2013 =  0.81  if minor_market=="DELAWARE MINOR"
replace leader2013 =  0.23 if minor_market=="ARKANSAS MINOR"
replace leader2013 =  0.48 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace leader2013 =  0.21  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace leader2013 =  0.54 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace leader2013 =  0.84 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace leader2013 =  0.6 if minor_market=="OKLAHOMA MINOR"
replace leader2013 =  0.67 if minor_market=="TENNESSEE MINOR"
replace leader2013 =  0.7 if minor_market=="WISCONSIN MINOR"
replace leader2013 =  0.6 if minor_market=="WYOMING MINOR"

gen leader2014=0
replace leader2014 = 0.69  if minor_market=="DELAWARE MINOR"
replace leader2014 = 0.65  if minor_market=="ARKANSAS MINOR"
replace leader2014 = 0.52 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace leader2014 = 0.23  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace leader2014 = 0.6  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace leader2014 = 0.44 if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace leader2014 = 0.65  if minor_market=="OKLAHOMA MINOR"
replace leader2014 = 0.67 if minor_market=="TENNESSEE MINOR"
replace leader2014 = 0.28  if minor_market=="WISCONSIN MINOR"
replace leader2014 = 0.62  if minor_market=="WYOMING MINOR"

gen leader2015=0
replace leader2015 = 0.65  if minor_market=="DELAWARE MINOR"
replace leader2015 = 0.69  if minor_market=="ARKANSAS MINOR"
replace leader2015 = 0.55  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace leader2015 = 0.34   if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace leader2015 = 0.66  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace leader2015 = 0.45  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace leader2015 = 0.72  if minor_market=="OKLAHOMA MINOR"
replace leader2015 = 0.65  if minor_market=="TENNESSEE MINOR"
replace leader2015 = 0.29  if minor_market=="WISCONSIN MINOR"
replace leader2015 = 0.69  if minor_market=="WYOMING MINOR"

gen numfirms5more2013=0
replace numfirms5more2013 = 2  if minor_market=="DELAWARE MINOR"
replace numfirms5more2013 = 5  if minor_market=="ARKANSAS MINOR"
replace numfirms5more2013 = 4  if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace numfirms5more2013 = 7  if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace numfirms5more2013 = 3  if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace numfirms5more2013 = 2  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace numfirms5more2013 = 4  if minor_market=="OKLAHOMA MINOR"
replace numfirms5more2013 = 3  if minor_market=="TENNESSEE MINOR"
replace numfirms5more2013 = 3 if minor_market=="WISCONSIN MINOR"
replace numfirms5more2013 = 4 if minor_market=="WYOMING MINOR"

gen numfirms5more2014=0
replace numfirms5more2014 = 2  if minor_market=="DELAWARE MINOR"
replace numfirms5more2014 = 3  if minor_market=="ARKANSAS MINOR"
replace numfirms5more2014 = 4 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace numfirms5more2014 = 7 if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace numfirms5more2014 = 4 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace numfirms5more2014 = 5  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace numfirms5more2014 = 4  if minor_market=="OKLAHOMA MINOR"
replace numfirms5more2014 = 3  if minor_market=="TENNESSEE MINOR"
replace numfirms5more2014 = 4 if minor_market=="WISCONSIN MINOR"
replace numfirms5more2014 = 4  if minor_market=="WYOMING MINOR"

gen numfirms5more2015=0
replace numfirms5more2015 = 2  if minor_market=="DELAWARE MINOR"
replace numfirms5more2015 = 3  if minor_market=="ARKANSAS MINOR"
replace numfirms5more2015 = 4 if minor_market=="CENTRAL TEXAS MINOR" | minor_market=="DALLAS MINOR" | minor_market=="HOUSTON MINOR"  | minor_market=="NORTH TEXAS MINOR" | minor_market=="SOUTH TEXAS MINOR" 
replace numfirms5more2015 = 5 if minor_market=="EAST PENNSYLVANIA MINOR" | minor_market=="PENNSYLVANIA MINOR" | minor_market=="WEST PENNSYLVANIA MINOR"
replace numfirms5more2015 = 4 if minor_market=="ILLINOIS MINOR" | minor_market=="RV ILLINOIS MINOR" | minor_market=="SOUTHERN IL MINOR" 
replace numfirms5more2015 = 5  if minor_market=="KANSAS CITY MO MINOR" | minor_market=="MISSOURI MINOR"
replace numfirms5more2015 = 4  if minor_market=="OKLAHOMA MINOR"
replace numfirms5more2015 = 3 if minor_market=="TENNESSEE MINOR"
replace numfirms5more2015 = 6 if minor_market=="WISCONSIN MINOR"
replace numfirms5more2015 = 4  if minor_market=="WYOMING MINOR"


*cd "/Users/sebastianfleitas/Desktop/ReclassificationRisk"
*save datasetindviduals_restat_revisions.dta, replace 


* aux variables for new selection equastions 
**************************************************************************************************
bys cust_seg_nbr year: egen firm_riskscore = mean(acg_riskscore)
egen firmfe= group(customer_number)
bys customer_number: egen max_year_firm =  max(year)
gen interaction = firm_riskscore*acg_riskscore
sort  mbr_sys_id year
xtset mbr_sys_id year 
gen exit_new = 1 if mbr_sys_id[_n]!=mbr_sys_id[_n+1]
replace exit_new=0 if exit_new==.

* aggregating data to the firm level
**********************************************************************************************
set more off

*compute the mean of age and gender in the pool 
bys customer_number  year        : egen mean_age = mean(age)
bys customer_number  year        : egen mean_female = mean(female)

gen employee = (rel_cd=="EE")
bys customer_number  year        : egen mean_employee = mean(employee)

gen spouse = (rel_cd=="SP")
bys customer_number  year        : egen mean_spouse = mean(spouse)

gen children = (rel_cd=="CH")
bys customer_number  year        : egen mean_children = mean(children)

gen others = (rel_cd=="NB" | rel_cd=="SC" | rel_cd=="SD" )
bys customer_number  year        : egen mean_others = mean(others)

* create variables R 
**************************************************************************************
bys customer_number  year        : egen mean_pred_riskscore = mean(laggedscore)
bys customer_number  year        : egen mean_ORS_riskscore = mean(laggedscore_ORS)

bys prime_plan_code  year        : egen mean_pred_policy = mean(laggedscore)
bys prime_plan_code  year        : egen mean_ORS_policy = mean(laggedscore_ORS)

bys customer_number  year        : egen mean_pred_riskscore_sd = sd(laggedscore)
replace mean_pred_riskscore_sd=0 if mean_pred_riskscore_sd==.


**************************************************************************************
bys customer_number  year        : egen mean_pred_riskscore_current = mean(acg_riskscore)
bys customer_number  year        : egen mean_pred_riskscore_stayers_aux = mean(laggedscore) if newstayers_allyears==1
bys customer_number  year        : egen mean_pred_riskscore_stayers = mean(mean_pred_riskscore_stayers_aux) 
****
bys customer_number  year        : egen mean_paid           = mean(laggedclaims)
bys prime_plan_code  year        : egen mean_paid_policy    = mean(laggedclaims)

bys customer_number  year        : egen mean_outpocket           = mean(laggedoutpocket)
bys customer_number  year        : egen mean_allowed_sum           = mean(laggedallowed_sum)
****
bys customer_number  year        : egen mean_paid_current           = mean(paid_sum)
bys customer_number  year        : egen mean_premium        = mean(av_premiumpc_plan)
bys prime_plan_code  year        : egen mean_premium_policy        = mean(av_premiumpc_plan)

gen aux11=1
bys prime_plan_code  year        : egen people_policy        = sum(aux11)
bys customer_number  year        : egen people_plan        = sum(aux11)

histogram people_policy
histogram people_plan

*********************
sort mbr_sys_id year
gen stayer = 1 if mbr_sys_id[_n] == mbr_sys_id[_n-1] & customer_number[_n]==customer_number[_n-1]
replace stayer=0 if stayer==.
bys customer_number year : egen percentage_stayers_aux = sum(stayer)
gen percentage_stayers = percentage_stayers_aux/numpeople
*********************

drop if year>2013

/*
cd "/Users/sebastianfleitas/Dropbox/ReclassificationRisk"
save datasetindviduals_restat_revisions_June2022_6months.dta, replace 
*/


**************************************************************************************************
preserve 
gen indirobs=1 
drop if year!=2013
collapse (mean) indirobs, by(customer_number)
tempfile directoriofirmas
save `directoriofirmas'
restore 

sort customer_number
merge customer_number using `directoriofirmas'
bys customer_number: egen max_indirobs =  max(indirobs)
drop if max_indirobs!=1 

**************************************************************************************************


**************************************************************************************************
** BEGINING OF USIC ESTIMATES
** TWO STEP REGRESSION USING THIS SAMPLE (ROBUSTNESS ON LEAVING THE SAMPLE) **
**************************************************************************************************
tsset mbr_sys_id year
egen stderrorscluster = group(customer_number year)
gen SEX = (gdr_cd=="F")
gen industry = substr(sic_cd,1,1)
tab industry, gen(industy_dummy)
**************************************************************************************************
local conditions "lagged_code_hypertension lagged_code_heartfailure lagged_chronic_ami laggeed_code_respfailure  lagged_code_brainhemorr lagged_chronic_cancer lagged_chronic_diabetes lagged_code_asthma" 
mkspline reg16 1 reg17 = mean_pred_riskscore_rp 
*if year==2015 /*making the spline on the delta for 2015, cross section estimate */
*gen reg1 =  mean_pred_riskscore_rp 
gen reg2 = laggedscore
gen reg3 = age 
gen reg4 = SEX
gen reg5 = industy_dummy1
gen reg6 = industy_dummy2
gen reg7 = industy_dummy3
gen reg8 = industy_dummy4 
gen reg9 = industy_dummy5
gen reg10 = industy_dummy6
gen reg11 = industy_dummy7
gen reg12 = industy_dummy8
gen reg13 = industy_dummy9
gen reg14 = industy_dummy10
gen reg15 = numpeople
probit exit_new reg* if year==2013, cluster(stderrorscluster) noconstant
forvalues i=2/17 {
scalar coeffprobit`i' = _b[reg`i']
}
*probit exit_new laggedscore mean_pred_riskscore_rp age SEX industy_dummy* numpeople if year==2014, cluster(stderrorscluster) noconstant
local n_probit = e(N)
matrix V = e(V)* `n_probit'
*ivprobit exit_new (laggedscore mean_pred_riskscore_rp = laggedscore_ORS mean_ORS_riskscore_rp) age SEX industy_dummy* numpeople if year==2014, cluster(stderrorscluster) 
margins, dydx(*)
predict linear_index, xb
gen prob_leaving = normal(linear_index)
gen prob_leaving2 = prob_leaving^2
gen prob_leaving3 = prob_leaving^3
gen prob_leaving4 = prob_leaving^4
gen prob_leaving5 = prob_leaving^5
gen prob_leaving6 = prob_leaving^6 
tsset mbr_sys_id year
***** *****



**************************************************************************************************
**************************************************************************************************
**************************************************************************************************
**************************************************************************************************


set more off
cap drop sample
**************************************************************************************************
** PANEL A: Fixed effects estimates 
**************************************************************************************************
reghdfe mean_premium mean_pred_riskscore_rp  yeardum* , absorb(customer_number) vce(cluster customer_number year)
gen sample = e(sample)
drop if sample!=1 
reghdfe mean_premium mean_pred_riskscore_rp  yeardum* if sample==1 , absorb(mbr_sys_id) vce(cluster customer_number year)
eststo mod1  
reghdfe mean_premium mean_pred_riskscore_rp prob_leaving yeardum* , absorb(mbr_sys_id) vce(cluster customer_number year)
eststo mod2
reghdfe mean_premium mean_pred_riskscore_rp prob_leaving* yeardum* , absorb(mbr_sys_id) vce(cluster customer_number year)
eststo mod3
********
****************************************************************************************************************** fragment
*********TABLE PANEL A 
esttab mod1 mod2 mod3 using Restat_Table6_6months.tex , replace label   nonotes nonumbers stats(N, fmt(%12.0fc)) compress cells(b(star fmt(%12.0fc  ) label(" ")) se(fmt( %12.0fc ) label(" ") par) ) collabels(none) nogaps  ///
		title(Main Estimates) mtitles("I" "II" "III" ) ///
		star(* 0.10 ** 0.05 *** 0.01)  ///
		keep( mean_pred_riskscore_rp ) ///
		prefoot( \hline  "Year FE & Yes & Yes & Yes" \\ ///
		"Polynomial Order & No  & First & Sixth" \\ ///
		"Observations & 2013 & 2013 & 2013 & 2013 "\\ \hline) 
eststo clear
*********************************************************************************************


**************************************************************************************************
** PANEL B: No Fixed effects estimates 
**************************************************************************************************
reghdfe  mean_premium mean_pred_riskscore_rp  marketdum* yeardum* if sample==1 ,  noabsorb vce(cluster customer_number year)
eststo mod1  
reghdfe  mean_premium mean_pred_riskscore_rp  marketdum* prob_leaving yeardum* if sample==1 ,  noabsorb vce(cluster customer_number year)
eststo mod2
reghdfe  mean_premium mean_pred_riskscore_rp  marketdum* prob_leaving* yeardum* if sample==1 ,  noabsorb vce(cluster customer_number year)
eststo mod3
****************************************************************************************************************** fragment
*********TABLE PANEL B
esttab mod1 mod2 mod3 using Restat_Table6_PanelB_6months.tex , replace label   nonotes nonumbers stats(N, fmt(%12.0fc)) compress cells(b(star fmt(%12.0fc  ) label(" ")) se(fmt( %12.0fc ) label(" ") par) ) collabels(none) nogaps  ///
		title(Main Estimates) mtitles("I" "II" "III" ) ///
		star(* 0.10 ** 0.05 *** 0.01)  ///
		keep( mean_pred_riskscore_rp ) ///
		prefoot( \hline  "Year FE & Yes & Yes & Yes" \\ ///
		"Polynomial Order & No  & First & Sixth" \\ ///
		"Observations & 2013 & 2013 & 2013 & 2013 "\\ \hline) 
eststo clear
*********************************************************************************************


exit 
*** Do the table
** 0_November2018_Table6_Paper.tex
collapse (mean) mean_premium mean_pred_riskscore_rp  , by(customer_number year)
tab(year), gen(yeardum)

reghdfe  mean_premium mean_pred_riskscore_rp yeardum*  ,  absorb(customer_number) vce(cluster customer_number year)
reghdfe  mean_premium mean_pred_riskscore_rp yeardum*  ,  noabsorb vce(cluster customer_number year)


